Configure all file paths based on the Rmarkdown parameter stage .
# things that never become expressed or bound
# grep -f L3.classD.wbid L1.classD.wbid > L1L3.class.wbid
# grep -f L1L3.class.wbid LE.classD.wbid > LEL1L3.classQ.wbid
classQ.wbid = read.table("../03_output/LEL1L3.classQ.wbid")[[1]]
rob.dir = normalizePath('~/work/ELT-2-ChIP-revision/Rob/03_emb_L1_L3_intestine_RNAseq/03_output')
rob.files = list(LE='res_embryoGFPplus_vs_embryoGFPminus_apeglm.csv',
L1='res_L1GFPplus_vs_L1GFPminus_apeglm.csv',
L3='res_L3GFPplus_vs_L3GFPminus_apeglm.csv')
RNASEQ = file.path(rob.dir, rob.files[[params$stage]])
UPSTREAM=1000
DOWNSTREAM=200
IDR_BED = sprintf("../01_input/ELT2_%s_combined_IDR.bed", params$stage) # peaks input file
OUTPUT_03 = normalizePath("../03_output")
# output files from genomic ranges
PROMOTOR_BED_PATH = sprintf("%s/filtered.promoters.minus%d_plus%d.bed",
OUTPUT_03,
UPSTREAM,
DOWNSTREAM)
# colliding promoters removed
NR_PROMOTOR_BED_PATH = sprintf("%s/nr.promoters.minus%d_plus%d.bed",
OUTPUT_03,
UPSTREAM,
DOWNSTREAM)
# input signal file for wiggle tool step
SIGNAL_BW = sprintf("../01_input/ELT2_%s_combined_subtracted.bw", params$stage)
INTERP_SIGNAL_BW = sprintf("../01_input/ELT2_%s_combined_subtracted.interp.bw", params$stage)
# output files from wiggle tool step
PROMOTOR_DF_PATH = sprintf("%s/%s.filtered.promoters.minus%d_plus%d.df",
OUTPUT_03,
params$stage,
UPSTREAM,
DOWNSTREAM)
NR_PROMOTOR_DF_PATH = sprintf("%s/%s.nr.promoters.minus%d_plus%d.df",
OUTPUT_03,
params$stage,
UPSTREAM,
DOWNSTREAM)
# Log conversion
LOG_PROMOTOR_DF_PATH = sprintf("%s/log.%s.filtered.promoters.minus%d_plus%d.df",
OUTPUT_03,
params$stage,
UPSTREAM,
DOWNSTREAM)
IDR_DF = sprintf("../01_input/ELT2_%s_combined_IDR.df", params$stage) # peaks with signal agg
## Using saved promoter query.
## GRanges object with 6 ranges and 2 metadata columns:
## seqnames ranges strand | wbps_gene_id external_gene_id
## <Rle> <IRanges> <Rle> | <character> <character>
## [1] chrI 10031-11230 - | WBGene00022277 homt-1
## [2] chrI 10495-11694 + | WBGene00022276 nlp-40
## [3] chrI 26582-27781 - | WBGene00022278 rcor-1
## [4] chrI 32951-34150 - | WBGene00022279 sesn-1
## [5] chrI 42733-43932 + | WBGene00022275 txt-7
## [6] chrI 46461-47660 + | WBGene00044345 Y48G1C.12
## -------
## seqinfo: 7 sequences (1 circular) from ce11 genome
Install a conda environment containing wiggletools and ucsc user apps via root/David/01_promoters/02_scripts/conda_envs/elt-2-rev.yaml
Sys.setenv(PROMOTOR_BED_PATH=PROMOTOR_BED_PATH, # all promoters
NR_PROMOTOR_BED_PATH = NR_PROMOTOR_BED_PATH, # overlapping removed
IDR_BED = IDR_BED,
IDR_DF = IDR_DF,
SIGNAL_BW = SIGNAL_BW,
PROMOTOR_DF_PATH = PROMOTOR_DF_PATH,
NR_PROMOTOR_DF_PATH = NR_PROMOTOR_DF_PATH,
STAGE=params$stage
)
Run wiggletools in a bash session.
Read in the results of the wiggletools commands.
promoters.agg = read.table(PROMOTOR_DF_PATH)
colnames(promoters.agg) <- c("chrom", "start","end","len", "strand", "wbps_gene_id", "gene_name", "chip_signal_mean", "chip_signal_max")
IDR_peaks.agg = read.table(IDR_DF)
IDR_peaks.agg$V4 = NULL
IDR_peaks.agg$V5 = NULL
IDR_peaks.agg$V6 = NULL
IDR_peaks.agg$V8 = NULL
colnames(IDR_peaks.agg) <- c("chrom", "start","end","intensity","nlogq","offset","signal.mean","signal.max")
gr.IDR = makeGRangesFromDataFrame(IDR_peaks.agg,keep.extra.columns = T)
seqinfo(gr.IDR) <- Seqinfo(genome="ce11")
gr.IDR$signal.sum = width(gr.IDR )*gr.IDR$signal.mean
gr.promoters = makeGRangesFromDataFrame(promoters.agg,keep.extra.columns = T)
seqinfo(gr.promoters) <- Seqinfo(genome="ce11")
Attach log scale promoter signal values.
chipmean.minval = min(gr.promoters$chip_signal_mean,na.rm=T)
chipmean.minval
## [1] -149
chipmax.minval = min(gr.promoters$chip_signal_max,na.rm=T)
chipmax.minval
## [1] -91.3
chipmean.log = log(-chipmean.minval + 1 + gr.promoters$chip_signal_mean,base=2)
chipmax.log = log(-chipmax.minval + 1 + gr.promoters$chip_signal_max,base=2)
gr.promoters$log_chip_signal_mean = chipmean.log
gr.promoters$log_chip_signal_max = chipmax.log
head(gr.promoters)
## GRanges object with 6 ranges and 7 metadata columns:
## seqnames ranges strand | len wbps_gene_id gene_name
## <Rle> <IRanges> <Rle> | <integer> <character> <character>
## [1] chrI 10031-11230 - | 1200 WBGene00022277 homt-1
## [2] chrI 10495-11694 + | 1200 WBGene00022276 nlp-40
## [3] chrI 26582-27781 - | 1200 WBGene00022278 rcor-1
## [4] chrI 32951-34150 - | 1200 WBGene00022279 sesn-1
## [5] chrI 42733-43932 + | 1200 WBGene00022275 txt-7
## [6] chrI 46461-47660 + | 1200 WBGene00044345 Y48G1C.12
## chip_signal_mean chip_signal_max log_chip_signal_mean log_chip_signal_max
## <numeric> <numeric> <numeric> <numeric>
## [1] 97.7057 510.4876 7.95330 9.23555
## [2] 313.8040 643.6636 8.85781 9.52353
## [3] 51.6907 106.8574 7.65700 7.63790
## [4] 28.0552 38.9742 7.47732 7.03664
## [5] 32.2047 89.4033 7.51053 7.50559
## [6] 17.3023 26.3194 7.38752 6.89042
## -------
## seqinfo: 7 sequences (1 circular) from ce11 genome
# output file
LOG_PROMOTOR_DF_PATH = sprintf("%s/log_%s_filtered.promoters.minus%d_plus%d.df", OUTPUT_03, params$stage, UPSTREAM, DOWNSTREAM)
write.table(as.data.frame(gr.promoters), file = LOG_PROMOTOR_DF_PATH,quote=F, row.names=F,sep="\t")
Find overlaps between promoters and IDR peaks. Populate IDR signal fields when a peak exists, leave NaN otherwise.
laps = findOverlaps(gr.promoters,gr.IDR, ignore.strand=T,minoverlap = 100)
sprintf("%d/%d (%.1f) promoters have a peak.", length(unique(from(laps))), laps@nLnode,
length(unique(from(laps)))/ laps@nLnode)
## [1] "5043/19985 (0.3) promoters have a peak."
sprintf("%d/%d (%.1f) peaks are in promoters", length(unique(to(laps))), laps@nRnode,
length(unique(to(laps)))/ laps@nRnode)
## [1] "4608/9198 (0.5) peaks are in promoters"
gr.promoters$IDR_mean = NaN
gr.promoters$IDR_max = NaN
gr.promoters$IDR_value = NaN
gr.promoters$nlogq = NaN
gr.promoters$IDR_sum = NaN
gr.promoters[from(laps)]$IDR_max = gr.IDR[to(laps)]$signal.max
gr.promoters[from(laps)]$IDR_mean = gr.IDR[to(laps)]$signal.mean
gr.promoters[from(laps)]$IDR_value = gr.IDR[to(laps)]$intensity
gr.promoters[from(laps)]$IDR_sum = gr.IDR[to(laps)]$signal.sum
gr.promoters[from(laps)]$nlogq = gr.IDR[to(laps)]$nlogq
print("Number of promoters overlapping an IDR peak:")
## [1] "Number of promoters overlapping an IDR peak:"
sum(!is.nan(gr.promoters$IDR_max))
## [1] 5043
# the density/distribution of the non-transformed values is too thin to
# show on the graph, so this is commented out. If you want to see the distribution,
# uncomment the ecdf plots.
idr.nonlog = as.data.frame(gr.promoters) %>% filter(is.finite(IDR_value)) %>% dplyr::select('IDR_value','IDR_mean','IDR_max','IDR_sum') %>% gather(key="dataset")
# ggplot(idr.nonlog, aes(x=value, fill=dataset)) + geom_density(alpha=.5) + labs(title="Distributions of NON-Log transformed ChIP signal values in peaks")
#
# idr.val.ecdf = ecdf(gr.promoters$IDR_value)
# idr.mean.ecdf = ecdf(gr.promoters$IDR_mean)
# idr.max.ecdf = ecdf(gr.promoters$IDR_max)
#
# plot(idr.val.ecdf)
# lines(idr.mean.ecdf,col="blue")
# lines(idr.max.ecdf,col="red")
# the data currently have all positive values, so no adjustment made for log
idr.val.log = log10(gr.promoters$IDR_value)
idr.mean.log = log10(gr.promoters$IDR_mean)
idr.max.log = log10(gr.promoters$IDR_max)
idr.sum.log = log10(gr.promoters$IDR_sum)
# idr.val.log.ecdf = ecdf(idr.val.log)
# idr.mean.log.ecdf = ecdf(idr.mean.log)
# idr.max.log.ecdf = ecdf(idr.max.log)
#
# plot(idr.val.log.ecdf)
# lines(idr.mean.log.ecdf,col="blue")
# lines(idr.max.log.ecdf,col="red")
log.vals = data.frame(idr.mean = idr.mean.log, idr.val = idr.val.log, idr.max = idr.max.log, idr.sum = idr.sum.log)
long.log.vals = gather(log.vals, key="dataset")
head(long.log.vals)
ggplot(long.log.vals, aes(x=value, fill=dataset)) + geom_density(alpha=.5) + labs(title=sprintf("Distributions of log10 transformed IDR values for %s",params$stage))
## Warning: Removed 59768 rows containing non-finite values (stat_density).
gr.promoters$IDR_logTEN_max = idr.max.log
gr.promoters$IDR_logTEN_mean = idr.mean.log
gr.promoters$IDR_logTEN_value = idr.val.log
gr.promoters$IDR_logTEN_sum = idr.sum.log
extremes = gr.promoters$IDR_logTEN_sum > 5
extremes[is.na(extremes)] <- FALSE
gr.extremes = gr.promoters[extremes]
gr.extremes = gr.extremes[order(gr.extremes$IDR_logTEN_sum, decreasing = T)]
head(gr.extremes)
## GRanges object with 6 ranges and 16 metadata columns:
## seqnames ranges strand | len wbps_gene_id gene_name
## <Rle> <IRanges> <Rle> | <integer> <character> <character>
## [1] chrII 2377652-2378851 - | 1200 WBGene00017521 F16G10.6
## [2] chrV 8767909-8769108 + | 1200 WBGene00015220 B0507.3
## [3] chrIV 3899504-3900703 + | 1200 WBGene00023478 srz-62
## [4] chrX 14790008-14791207 + | 1200 WBGene00009628 tatn-1
## [5] chrII 915080-916279 + | 1200 WBGene00019923 fbxc-29
## [6] chrII 4925049-4926248 - | 1200 WBGene00019895 aagr-2
## chip_signal_mean chip_signal_max log_chip_signal_mean log_chip_signal_max
## <numeric> <numeric> <numeric> <numeric>
## [1] 5915.25 15444.01 12.5664 13.9234
## [2] 2305.98 3950.32 11.2622 11.9811
## [3] 1910.68 4123.10 11.0090 12.0415
## [4] 1516.05 2514.83 10.7023 11.3483
## [5] 1538.22 3469.06 10.7214 11.7982
## [6] 1751.17 3384.64 10.8928 11.7636
## IDR_mean IDR_max IDR_value nlogq IDR_sum IDR_logTEN_max
## <numeric> <numeric> <numeric> <numeric> <numeric> <numeric>
## [1] 9558.01 15444.01 9719.27 3.12927 9529332 4.18876
## [2] 3186.10 3950.32 2912.77 3.12927 1965824 3.59663
## [3] 3184.86 4123.10 3796.41 3.12927 1859957 3.61522
## [4] 1906.45 2514.83 1319.22 3.12927 1593795 3.40051
## [5] 2869.27 3469.06 3165.58 3.12927 1526452 3.54021
## [6] 2713.72 3384.64 2952.32 3.12927 1476266 3.52951
## IDR_logTEN_mean IDR_logTEN_value IDR_logTEN_sum
## <numeric> <numeric> <numeric>
## [1] 3.98037 3.98763 6.97906
## [2] 3.50326 3.46431 6.29354
## [3] 3.50309 3.57937 6.26950
## [4] 3.28023 3.12032 6.20243
## [5] 3.45777 3.50045 6.18368
## [6] 3.43357 3.47016 6.16916
## -------
## seqinfo: 7 sequences (1 circular) from ce11 genome
Read in RNA-seq data, join promoters by wbps geneid, and then sort logFoldChange high to low.
# input file
rnaseq = read.csv(RNASEQ)
rownames(rnaseq) <- rnaseq$WBGeneID
mcols(gr.promoters) <- mcols(gr.promoters) %>%
cbind(rnaseq[gr.promoters$wbps_gene_id,2:6]) %>%
as.data.frame() %>%
dplyr::rename(IDR_nlogq = nlogq)
names(gr.promoters) <- gr.promoters$wbps_gene_id
# sort promoters high to low by log2FC
gr.promoters = gr.promoters[order(gr.promoters$log2FoldChange,decreasing=T)]
gr.promoters[,2:5]
## GRanges object with 19985 ranges and 4 metadata columns:
## seqnames ranges strand | wbps_gene_id gene_name
## <Rle> <IRanges> <Rle> | <character> <character>
## WBGene00016231 chrV 2579898-2581097 + | WBGene00016231 C29G2.3
## WBGene00021593 chrIII 1754743-1755942 - | WBGene00021593 Y46E12A.3
## WBGene00005522 chrV 18806310-18807509 - | WBGene00005522 sri-10
## WBGene00020384 chrV 5349372-5350571 - | WBGene00020384 T09D3.3
## WBGene00005548 chrII 2585394-2586593 - | WBGene00005548 sri-36
## ... ... ... ... . ... ...
## WBGene00304895 chrX 16952760-16953959 - | WBGene00304895 F52G3.8
## WBGene00303060 chrX 16962921-16964120 + | WBGene00303060 F52G3.7
## WBGene00016901 chrX 17309908-17311107 + | WBGene00016901 C53C11.4
## WBGene00303423 chrX 17361829-17363028 + | WBGene00303423 F10D7.11
## WBGene00304237 chrX 17567279-17568478 - | WBGene00304237 F39B3.7
## chip_signal_mean chip_signal_max
## <numeric> <numeric>
## WBGene00016231 -25.3792 -14.51178
## WBGene00021593 -21.1586 -17.62152
## WBGene00005522 -13.0670 -3.70544
## WBGene00020384 -29.8736 -18.35473
## WBGene00005548 58.7131 103.57881
## ... ... ...
## WBGene00304895 -25.47366 -13.938541
## WBGene00303060 100.15240 225.856720
## WBGene00016901 -4.03903 0.056697
## WBGene00303423 -16.65387 -2.584754
## WBGene00304237 -18.73224 -7.003493
## -------
## seqinfo: 7 sequences (1 circular) from ce11 genome
# look at the number filtered by DESeq2
# as described by https://bioconductor.org/packages/release/bioc/vignettes/DESeq2/inst/doc/DESeq2.html#pvaluesNA
baseMean_is_zero = rnaseq$baseMean == 0
pval_na = is.na(rnaseq$pvalue)
padj_na = is.na(rnaseq$padj)
# case one
sum(baseMean_is_zero & pval_na & padj_na)
## [1] 0
# case two
sum(!baseMean_is_zero & pval_na & padj_na)
## [1] 38
# case three
sum(!pval_na & padj_na)
## [1] 7713
# divide groups by peak and padj
enriched_intestine = gr.promoters$padj<.05 & !is.na(gr.promoters$padj) & gr.promoters$log2FoldChange > 0
has_peak = !is.nan(gr.promoters$IDR_max)
classA = enriched_intestine & has_peak
classB = !enriched_intestine & has_peak
classC = enriched_intestine & !has_peak
classD = !enriched_intestine & !has_peak
m = matrix( c(sum(classA),
sum(classB),
sum(classC),
sum(classD)), ncol = 2, byrow = T)
colnames(m) <- c("Peak","NO peak")
rownames(m) <- c("Int. expressed","NOT Int. expressed")
m
## Peak NO peak
## Int. expressed 1767 3276
## NOT Int. expressed 1170 13772
m.chisq = chisq.test(m)
m.chisq
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: m
## X-squared = 2224, df = 1, p-value <2e-16
display_table = rbind(m,m.chisq$expected)
pval.label=ifelse(m.chisq$p.value <2e-16, "<2e-16", sprintf("%.3f", .chisq$p.value))
knitr::kable(display_table,
digits = 1,
format.args=list(big.mark = ','),
caption=sprintf("%s: Chi-sq p-val: %s",
params$stage,
pval.label)) %>%
pack_rows(index = c("Observed" = 2, "Expected" = 2)) %>%
kable_styling(latex_options = "scale_down")
| Peak | NO peak | |
|---|---|---|
| Observed | ||
| Int. expressed | 1,767 | 3,276 |
| NOT Int. expressed | 1,170 | 13,772 |
| Expected | ||
| Int. expressed | 741 | 4,302 |
| NOT Int. expressed | 2,196 | 12,746 |
gr.promoters$class = "classA"
gr.promoters$class[classB] <- "classB"
gr.promoters$class[classC] <- "classC"
gr.promoters$class[classD] <- "classD"
sigmoid = function(z) {1 / (1 + exp(-z))}
dataset = as.data.frame(mcols(gr.promoters))
dataset$enriched = ifelse( dataset$padj < .05, 1, 0)
dataset = dataset %>% dplyr::filter(is.finite(enriched)) %>% arrange(log_chip_signal_mean)
#dataset = dataset[11:(nrow(dataset)-10),]
mod = glm(enriched ~ log_chip_signal_max, data=dataset[11:(nrow(dataset)-10),], family = "binomial")
dataset$prediction = sigmoid(predict(mod,newdata=dataset))
curve = data.frame(
log_chip_signal_max=seq(
min(dataset$log_chip_signal_max),
max(dataset$log_chip_signal_max),
length.out=50))
curve$y = sigmoid(predict(mod, newdata=curve))
dataset=dataset %>% arrange(log_chip_signal_max)
ggplot(dataset[11:(nrow(dataset)-10),], aes(x=log_chip_signal_max, y=enriched)) +
geom_jitter(alpha=.1) +
geom_line(data=curve, aes(x=log_chip_signal_max, y=y),inherit.aes=F) +
labs(title=sprintf("%s: Does promoter signal max predict intestine expression?",params$stage),
y="pr(intestine enriched)")
mod = glm(enriched ~ log_chip_signal_mean, data=dataset[11:(nrow(dataset)-10),], family = "binomial")
dataset$prediction = sigmoid(predict(mod,newdata=dataset))
curve = data.frame(
log_chip_signal_mean=seq(
min(dataset$log_chip_signal_mean),
max(dataset$log_chip_signal_mean),
length.out=50))
curve$y = sigmoid(predict(mod, newdata=curve))
dataset=dataset %>% arrange(log_chip_signal_mean)
ggplot(dataset[11:(nrow(dataset)-10),], aes(x=log_chip_signal_mean, y=enriched)) +
geom_point(alpha=.1) +
geom_line(data=curve, aes(x=log_chip_signal_mean, y=y),inherit.aes=F) +
labs(title=sprintf("%s: Does promoter signal mean/sum predict intestine expression?",params$stage),y="pr(intestine enriched)")
ROC = function(scores, truth) {
nPos = sum(truth)
nNeg = sum(!truth)
# True negatives
TN = function(i) {
ecdf(scores[!truth])(i) * nNeg
}
# False negatives
FN = function(i) {
ecdf(scores[truth])(i) * nPos
}
# False positives
FP = function(i) {
nNeg - TN(i)
}
# True positives
TP = function(i) {
nPos - FN(i)
}
FPR = function(i) {
FP(i)/(FP(i) + TN(i))
}
# Sensitivity
Sn = function(i) {
TP(i)/(TP(i) + FN(i)) # efficiency of recovering True Positives
}
Sp = function(i) {
TN(i)/(TN(i) + FP(i)) # success at excluding set B (True Negatives)
}
# Positive predictive value
PPV = function(i) {
TP(i)/(TP(i) + FP(i)) # proportion of called positives that are correct
}
# False positive rate
FPR = function(i) {
FP(i)/(FP(i)+TN(i)) # failure to exclude set B (True Negatives)
}
list(Sp=Sp,Sn=Sn,TP=TP,FP=FP,TN=TN,FN=FN,PPV=PPV,TPR=Sn,FPR=FPR)
}
dataset = as.data.frame(mcols(gr.promoters[classA|classB]) )
dataset = dataset[order(dataset$IDR_logTEN_sum),]
dataset$class = factor(dataset$class, levels=c("classA","classB"))
thresholds = seq(min(dataset$IDR_logTEN_sum), max(dataset$IDR_logTEN_sum), length.out = 100)
ROC.IDR.sum = ROC(dataset$IDR_logTEN_sum, dataset$class == 'classA')
TPR=ROC.IDR.sum$TPR
FPR=ROC.IDR.sum$FPR
TP = ROC.IDR.sum$TP
FP = ROC.IDR.sum$FP
TPRvsFPR = TPR(thresholds) - FPR(thresholds)
best=max(TPRvsFPR)
best_threshold = thresholds[ which(TPRvsFPR == best)]
ggplot(data.frame(threshold=thresholds,TPRvsFPR=TPRvsFPR),
aes(x=threshold,y=TPRvsFPR)) +
geom_line() +
geom_vline(xintercept=best_threshold) +
ggtitle(sprintf("%s: Using IDR sum score to recover classA (TPR) \nand exclude classB (FPR) ", params$stage)) + ylab("TPR - FPR")
df = data.frame()
for (funcname in names(ROC.IDR.sum)) {
f = ROC.IDR.sum[[funcname]]
df = df %>% rbind( data.frame(threshold=thresholds,measure=funcname,value=f(thresholds)))
}
df2 = df %>% filter(measure %in% c("TPR","FPR"))
df2$measure = factor(df2$measure, levels=c("TPR","FPR"))
ggplot(df2, aes(x=threshold,y=value, color=measure)) + geom_line() +
scale_color_manual(values=c("black","red")) +
geom_segment(x=best_threshold,xend=best_threshold,
y=FPR(best_threshold), yend=TPR(best_threshold),color='black') +
geom_point(x=best_threshold, y=TPR(best_threshold), color="black", size=.75) +
geom_point(x=best_threshold, y=FPR(best_threshold), color="black", size=.75) +
ggtitle("Positive predictive value of using peak signal to predict binding") +
geom_text(inherit.aes=F,aes(x=best_threshold, y=TPR(best_threshold)),
position=position_nudge(x=.6), label=sprintf("recover: %d/%d classA\nexclude: %d/%d classB", TP(best_threshold),sum(dataset$class == 'classA'), FP(best_threshold),sum(dataset$class == 'classB')))
data.frame(IDR_sum=thresholds, TPR=ROC.IDR.sum$TPR(thresholds), FPR=ROC.IDR.sum$FPR(thresholds)) %>%
ggplot(aes(x=FPR,y=TPR)) + geom_line() +
geom_point(x=FPR(best_threshold),y=TPR(best_threshold), size=.75) +
geom_abline(slope=1) + ggtitle("ROC for IDR sum")
ROC.df = data.frame(TPR=ROC.IDR.sum$TPR(thresholds),FPR=ROC.IDR.sum$FPR(thresholds),score="IDR sum")
IDR_SUM_BEST = which(thresholds == best_threshold)
thresholds = seq(min(dataset$IDR_logTEN_max), max(dataset$IDR_logTEN_max), length.out = 100)
ROC.IDR.max = ROC(dataset$IDR_logTEN_max, dataset$class == 'classA')
TPR=ROC.IDR.max$TPR
FPR=ROC.IDR.max$FPR
TP = ROC.IDR.max$TP
FP = ROC.IDR.max$FP
TPRvsFPR = TPR(thresholds) - FPR(thresholds)
best=max(TPRvsFPR)
best_threshold = thresholds[ which(TPRvsFPR == best)]
ggplot(data.frame(threshold=thresholds,TPRvsFPR=TPRvsFPR),
aes(x=threshold,y=TPRvsFPR)) +
geom_line() +
geom_vline(xintercept=best_threshold) +
ggtitle(sprintf("%s: Using IDR max score to recover classA (TPR) \nand exclude classB (FPR) ", params$stage)) + ylab("TPR - FPR")
df = data.frame()
for (funcname in names(ROC.IDR.max)) {
f = ROC.IDR.max[[funcname]]
df = df %>% rbind( data.frame(threshold=thresholds,measure=funcname,value=f(thresholds)))
}
df2 = df %>% filter(measure %in% c("TPR","FPR"))
df2$measure = factor(df2$measure, levels=c("TPR","FPR"))
ggplot(df2, aes(x=threshold,y=value, color=measure)) + geom_line() +
scale_color_manual(values=c("black","red")) +
geom_segment(x=best_threshold,xend=best_threshold,
y=FPR(best_threshold), yend=TPR(best_threshold),color='black') +
geom_point(x=best_threshold, y=TPR(best_threshold), color="black", size=.75) +
geom_point(x=best_threshold, y=FPR(best_threshold), color="black", size=.75) +
ggtitle("Positive predictive value of using peak signal to predict binding") +
geom_text(inherit.aes=F,aes(x=best_threshold, y=TPR(best_threshold)),
position=position_nudge(x=.6), label=sprintf("recover: %d/%d classA\nexclude: %d/%d classB",
round(TP(best_threshold)),
sum(dataset$class == 'classA'),
round(FP(best_threshold)),
sum(dataset$class == 'classB')))
data.frame(IDR_max=thresholds, TPR=ROC.IDR.max$TPR(thresholds), FPR=ROC.IDR.max$FPR(thresholds)) %>%
ggplot(aes(x=FPR,y=TPR)) + geom_line() +
geom_point(x=FPR(best_threshold),y=TPR(best_threshold), size=.75) +
geom_abline(slope=1)
ROC.df = ROC.df %>%
rbind(data.frame(TPR=ROC.IDR.max$TPR(thresholds),FPR=ROC.IDR.max$FPR(thresholds),score="IDR max"))
IDR_MAX_BEST = which(thresholds == best_threshold)
ggplot(ROC.df, aes(x=FPR,y=TPR, color=score)) + geom_line() + geom_abline(slope=1)
thresholds = seq(min(dataset$IDR_logTEN_value), max(dataset$IDR_logTEN_value), length.out = 100)
ROC.IDR.value = ROC(dataset$IDR_logTEN_value, dataset$class == 'classA')
TPR=ROC.IDR.value$TPR
FPR=ROC.IDR.value$FPR
TP = ROC.IDR.value$TP
FP = ROC.IDR.value$FP
TPRvsFPR = TPR(thresholds) - FPR(thresholds)
best=max(TPRvsFPR)
best_threshold = thresholds[ which(TPRvsFPR == best)]
ROC.df = ROC.df %>%
rbind(data.frame(TPR=ROC.IDR.value$TPR(thresholds),FPR=ROC.IDR.value$FPR(thresholds),score="IDR value"))
IDR_VALUE_BEST = which(thresholds == best_threshold)
ggplot(ROC.df, aes(x=FPR,y=TPR, color=score)) + geom_line() + geom_abline(slope=1) + ggtitle(sprintf("%s: ROC predictive power of IDR score to predict intestine expression", params$stage))
thresholds = seq(min(dataset$IDR_logTEN_mean), max(dataset$IDR_logTEN_mean), length.out = 100)
ROC.IDR.mean = ROC(dataset$IDR_logTEN_mean, dataset$class == 'classA')
TPR=ROC.IDR.mean$TPR
FPR=ROC.IDR.mean$FPR
TP = ROC.IDR.mean$TP
FP = ROC.IDR.mean$FP
TPRvsFPR = TPR(thresholds) - FPR(thresholds)
best=max(TPRvsFPR)
best_threshold = thresholds[ which(TPRvsFPR == best)]
ROC.df = ROC.df %>%
rbind(data.frame(TPR=ROC.IDR.mean$TPR(thresholds),FPR=ROC.IDR.mean$FPR(thresholds),score="IDR mean"))
IDR_MEAN_best = which(thresholds == best_threshold)
ggplot(ROC.df, aes(x=FPR,y=TPR, color=score)) + geom_line() + geom_abline(slope=1) + ggtitle(sprintf("%s ROC: predictive power of IDR score to predict intestine expression", params$stage))
kable(ROC.df %>% group_by(score) %>% summarize(best = max(TPR-FPR)), caption=sprintf("%s best TPR-FPR", params$stage), digits=3)
| score | best |
|---|---|
| IDR max | 0.289 |
| IDR mean | 0.294 |
| IDR sum | 0.242 |
| IDR value | 0.252 |
library(MASS)
# IDR sum in peak
classAB.lda = lda(as.matrix(dataset$IDR_logTEN_sum), dataset$class)
dataset$IDR_logTEN_sum.ghat = predict(classAB.lda)$class
mean(dataset$IDR_logTEN_sum.ghat != dataset$class)
## [1] 0.336
ggplot(dataset, aes(x=IDR_logTEN_sum, fill=class)) + geom_density(alpha=.5)
ggplot(dataset, aes(x=IDR_logTEN_mean, fill=class)) + geom_density(alpha=.5)
classAB.lda = lda(as.matrix(dataset$IDR_logTEN_max), dataset$class)
dataset$IDR_logTEN_max.ghat = predict(classAB.lda)$class
mean(dataset$IDR_logTEN_max.ghat != dataset$class)
## [1] 0.332
changepoint = dataset %>% filter(IDR_logTEN_max.ghat == 'classB') %>% pull(IDR_logTEN_max) %>% max()
total_greater_changepoint = dataset %>%
filter(IDR_logTEN_max > changepoint) %>%
nrow()
frac = dataset %>% filter(IDR_logTEN_max > changepoint) %>%
group_by(class) %>%
summarize(count=n(), frac=n()/total_greater_changepoint) %>%
filter(class == "classA") %>% pull(frac)
ggplot(dataset, aes(x=IDR_logTEN_max, fill=class)) +
geom_density(alpha=.5) +
geom_vline(xintercept=changepoint) +
scale_fill_brewer(palette="Spectral")
dataset=dataset %>% arrange(IDR_logTEN_max)
isClassA = dataset$class == "classA"
incrClassA = cumsum(isClassA)
incrClassB = cumsum(!isClassA)
dataset$incrClassA = incrClassA
dataset$incrClassB = incrClassB
dataset$fClassA = incrClassA/(1:nrow(dataset))
ggplot(dataset, aes(x=IDR_logTEN_max, y=incrClassA)) + geom_line() + geom_line(aes(y=incrClassB),color='orange')
thresholds = seq(min(dataset$IDR_logTEN_max), max(dataset$IDR_logTEN_max), length.out=24)
f.all = function(x) { 1-ecdf(dataset$IDR_logTEN_max)(x) }
called_positives = f.all(thresholds)*nrow(dataset)
f.classA = function(x) {1-ecdf(dataset$IDR_logTEN_max[dataset$class == 'classA'])(x)}
actual_positives = f.classA(thresholds)*sum(dataset$class == 'classA')
dataset$enriched = ifelse( dataset$padj < .05, 1, 0)
dataset = dataset %>% filter(is.finite(enriched)) %>% arrange(IDR_logTEN_sum)
mod = glm(enriched ~ IDR_logTEN_sum, data=dataset[11:(nrow(dataset)-10),], family = "binomial")
curve = data.frame(
IDR_logTEN_sum=seq(
min(dataset$IDR_logTEN_sum),
max(dataset$IDR_logTEN_sum),
length.out=50))
curve$y = sigmoid(predict(mod, newdata=curve))
dataset$prediction = sigmoid(predict(mod, newdata=dataset))
dataset=dataset %>% arrange(IDR_logTEN_sum)
ggplot(dataset, aes(x=IDR_logTEN_sum, y=enriched)) +
geom_point(alpha=.3) +
geom_line(data=curve, aes(x=IDR_logTEN_sum, y=y),inherit.aes=F) + labs(title=sprintf("%s: Does peak signal sum predict intestine expression?",params$stage), y="pr(intestine enriched)")
mod = glm(enriched ~ IDR_logTEN_mean, data=dataset[11:(nrow(dataset)-10),], family = "binomial")
curve = data.frame(
IDR_logTEN_mean=seq(
min(dataset$IDR_logTEN_mean),
max(dataset$IDR_logTEN_mean),
length.out=50))
curve$y = sigmoid(predict(mod, newdata=curve))
dataset$prediction = sigmoid(predict(mod,newdata=dataset))
dataset=dataset %>% arrange(IDR_logTEN_mean)
ggplot(dataset, aes(x=IDR_logTEN_mean, y=enriched)) +
geom_point(alpha=.3) +
geom_line(data=curve, aes(x=IDR_logTEN_mean, y=y),inherit.aes=F) +
labs(title=sprintf("%s: Does peak signal mean predict intestine expression?",params$stage),y="pr(intestine enriched)")
mod = glm(enriched ~ IDR_logTEN_max, data=dataset, family = "binomial")
curve = data.frame(
IDR_logTEN_max=seq(
min(dataset$IDR_logTEN_max),
max(dataset$IDR_logTEN_max),
length.out=50))
curve$y = sigmoid(predict(mod, newdata=curve))
dataset$prediction = sigmoid(predict(mod))
dataset=dataset %>% arrange(IDR_logTEN_max)
ggplot(dataset, aes(x=IDR_logTEN_max, y=enriched)) +
geom_point(alpha=.3) +
geom_line(data=curve, aes(x=IDR_logTEN_max, y=y),inherit.aes=F) +
labs(title=sprintf("%s: Does peak signal max predict intestine expression?",params$stage),y="pr(intestine enriched)")
mod = glm(enriched ~ IDR_logTEN_value, data=dataset, family = "binomial")
curve = data.frame(
IDR_logTEN_value=seq(
min(dataset$IDR_logTEN_value),
max(dataset$IDR_logTEN_value),
length.out=50))
curve$y = sigmoid(predict(mod, newdata=curve))
dataset$prediction = sigmoid(predict(mod))
dataset=dataset %>% arrange(IDR_logTEN_value)
ggplot(dataset, aes(x=IDR_logTEN_value, y=enriched)) +
geom_point(alpha=.3) +
geom_line(data=curve, aes(x=IDR_logTEN_value, y=y),inherit.aes=F) +
labs(title=sprintf("%s: Does peak IDR value predict intestine expression?",params$stage), y="pr(intestine enriched)")
promoters.hilo = as.data.frame(gr.promoters)
PROMOTERS_HILO_BED_PATH = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.bed", params$stage))
PROMOTERS_HILO_TSV_PATH = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.tsv", params$stage))
PROMOTERS_HILO_BED_PATH_A = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.classA.bed", params$stage))
PROMOTERS_HILO_BED_PATH_B = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.classB.bed", params$stage))
PROMOTERS_HILO_BED_PATH_C = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.classC.bed", params$stage))
PROMOTERS_HILO_BED_PATH_D = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.classD.bed", params$stage))
# BED format
write.table(promoters.hilo, PROMOTERS_HILO_BED_PATH, quote=F, sep="\t", row.names=F, col.names=F)
# Matrix format readable into R
write.table(promoters.hilo, PROMOTERS_HILO_TSV_PATH, quote=F, sep="\t", row.names=T, col.names=T)
write.table(promoters.hilo[classA,],
PROMOTERS_HILO_BED_PATH_A, quote=F, sep="\t", row.names=F, col.names=F)
write.table(promoters.hilo[classB,],
PROMOTERS_HILO_BED_PATH_B, quote=F, sep="\t",
row.names=F, col.names=F)
write.table(promoters.hilo[classC,],
PROMOTERS_HILO_BED_PATH_C, quote=F, sep="\t",
row.names=F, col.names=F)
write.table(promoters.hilo[classD,],
PROMOTERS_HILO_BED_PATH_D, quote=F, sep="\t",
row.names=F, col.names=F)
#### deeptooling up versus down only, no other filters
promoters.hilo.up = promoters.hilo %>% dplyr::filter(log2FoldChange > 0)
promoters.hilo.down = promoters.hilo %>% dplyr::filter(log2FoldChange < 0)
PROMOTERS_HILO_BED_PATH_UP = file.path(OUTPUT_03,
sprintf("%s.promoters.hilo.up.bed",params$stage))
PROMOTERS_HILO_BED_PATH_DOWN = file.path(OUTPUT_03,
sprintf("%s.promoters.hilo.down.bed",params$stage))
write.table(promoters.hilo.up,
PROMOTERS_HILO_BED_PATH_UP,
quote=F,
sep="\t",
row.names=F, col.names=F)
write.table(promoters.hilo.down,
PROMOTERS_HILO_BED_PATH_DOWN,
quote=F,
sep="\t",
row.names=F, col.names=F)
wtf.wbid = read.table('../01_input/wtf3.wbid')[[1]]
in_WTF = gr.promoters$wbps_gene_id %in% wtf.wbid
PROMOTERS_HILO_BED_PATH_A_TF = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.classA.TF.bed", params$stage))
PROMOTERS_HILO_BED_PATH_B_TF = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.classB.TF.bed", params$stage))
PROMOTERS_HILO_BED_PATH_C_TF = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.classC.TF.bed", params$stage))
PROMOTERS_HILO_BED_PATH_D_TF = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.classD.TF.bed", params$stage))
write.table(promoters.hilo[classA&in_WTF,],
PROMOTERS_HILO_BED_PATH_A_TF, quote=F, sep="\t", row.names=F, col.names=F)
write.table(promoters.hilo[classB&in_WTF,],
PROMOTERS_HILO_BED_PATH_B_TF, quote=F, sep="\t",
row.names=F, col.names=F)
write.table(promoters.hilo[classC&in_WTF,],
PROMOTERS_HILO_BED_PATH_C_TF, quote=F, sep="\t",
row.names=F, col.names=F)
write.table(promoters.hilo[classD&in_WTF,],
PROMOTERS_HILO_BED_PATH_D_TF, quote=F, sep="\t",
row.names=F, col.names=F)
To produce the deeptools output, execute DEEPTOOLS.bash.
It will compute promoters.hilo.mx and promoters.hilo.pdf.
Deeptools PDFs indicate a font called dejavu, if you’re tired of replacing it in Illustrator, install it from: https://sourceforge.net/projects/dejavu/
# data and params
Sys.setenv(UPSTREAM=UPSTREAM,
DOWNSTREAM=DOWNSTREAM,
INTERP_SIGNAL_BW=INTERP_SIGNAL_BW,
PROMOTERS_HILO_BED_PATH=PROMOTERS_HILO_BED_PATH,
PROMOTERS_HILO_BED_PATH_A=PROMOTERS_HILO_BED_PATH_A,
PROMOTERS_HILO_BED_PATH_B=PROMOTERS_HILO_BED_PATH_B,
PROMOTERS_HILO_BED_PATH_C=PROMOTERS_HILO_BED_PATH_C,
PROMOTERS_HILO_BED_PATH_D=PROMOTERS_HILO_BED_PATH_D,
PROMOTERS_HILO_BED_PATH_A_TF=PROMOTERS_HILO_BED_PATH_A_TF,
PROMOTERS_HILO_BED_PATH_B_TF=PROMOTERS_HILO_BED_PATH_B_TF,
PROMOTERS_HILO_BED_PATH_C_TF=PROMOTERS_HILO_BED_PATH_C_TF,
PROMOTERS_HILO_BED_PATH_D_TF=PROMOTERS_HILO_BED_PATH_D_TF)
# output files
PROMOTERS_HILO_UPDOWN_MX = file.path(OUTPUT_03,
sprintf("%s.promoters.hilo.updown.mx",params$stage))
PROMOTERS_CLASSES_UPDOWN_MX = file.path(OUTPUT_03,
sprintf("%s.promoters.hilo.CLASSES.mx",params$stage))
PROMOTERS_TF_CLASSES_UPDOWN_MX = file.path(OUTPUT_03,
sprintf("%s.promoters.hilo.TF.CLASSES.mx",params$stage))
PROMOTERS_HILO_UPDOWN_PDF = file.path(OUTPUT_03,
sprintf("%s.promoters.hilo.updown.pdf",params$stage))
PROMOTERS_CLASSES_UPDOWN_PDF = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.CLASSES.pdf",params$stage))
PROMOTERS_TF_CLASSES_UPDOWN_PDF = file.path(OUTPUT_03, sprintf("%s.promoters.hilo.TF.CLASSES.pdf",params$stage))
Sys.setenv(PROMOTERS_HILO_BED_PATH_UP=PROMOTERS_HILO_BED_PATH_UP,
PROMOTERS_HILO_BED_PATH_DOWN=PROMOTERS_HILO_BED_PATH_DOWN,
PROMOTERS_HILO_UPDOWN_MX=PROMOTERS_HILO_UPDOWN_MX,
PROMOTERS_CLASSES_UPDOWN_MX=PROMOTERS_CLASSES_UPDOWN_MX,
PROMOTERS_HILO_UPDOWN_PDF=PROMOTERS_HILO_UPDOWN_PDF,
PROMOTERS_CLASSES_UPDOWN_PDF=PROMOTERS_CLASSES_UPDOWN_PDF,
PROMOTERS_TF_CLASSES_UPDOWN_MX=PROMOTERS_TF_CLASSES_UPDOWN_MX,
PROMOTERS_TF_CLASSES_UPDOWN_PDF=PROMOTERS_TF_CLASSES_UPDOWN_PDF
)
source $HOME/.bash_profile
conda activate derptools # yaml environ in 02_scripts/conda_envs
pwd
set -ue # exit 1 if any vars are not set (using Sys.setenv in prev chunks)
echo PROMOTERS_HILO_BED_PATH $PROMOTERS_HILO_BED_PATH
echo PROMOTERS_HILO_BED_PATH_A_TF $PROMOTERS_HILO_BED_PATH_A_TF
echo PROMOTERS_HILO_BED_PATH_B_TF $PROMOTERS_HILO_BED_PATH_B_TF
echo PROMOTERS_HILO_BED_PATH_C_TF $PROMOTERS_HILO_BED_PATH_C_TF
echo PROMOTERS_HILO_BED_PATH_D_TF $PROMOTERS_HILO_BED_PATH_D_TF
echo PROMOTERS_TF_CLASSES_UPDOWN_MX $PROMOTERS_TF_CLASSES_UPDOWN_MX
echo PROMOTERS_TF_CLASSES_UPDOWN_PDF $PROMOTERS_TF_CLASSES_UPDOWN_PDF
BODYLENGTH=$(($UPSTREAM + $DOWNSTREAM))
# real 1m59.354s
# user 3m47.980s
# sys 0m2.663s
if true
then
time computeMatrix scale-regions --regionBodyLength $BODYLENGTH \
--startLabel 'up-1Kb' \
--endLabel down+200 \
--beforeRegionStartLength $UPSTREAM\
--afterRegionStartLength $DOWNSTREAM\
-R $PROMOTERS_HILO_BED_PATH_A_TF $PROMOTERS_HILO_BED_PATH_B_TF $PROMOTERS_HILO_BED_PATH_C_TF $PROMOTERS_HILO_BED_PATH_D_TF\
-S $INTERP_SIGNAL_BW\
-p 4 -o $PROMOTERS_TF_CLASSES_UPDOWN_MX
fi
plotHeatmap --plotTitle "WTF class A-D regions sorted by logFC expression high to low for ${STAGE}"\
--matrixFile $PROMOTERS_TF_CLASSES_UPDOWN_MX\
-out $PROMOTERS_TF_CLASSES_UPDOWN_PDF\
--sortRegions no\
--colorMap RdYlBu_r\
--startLabel '' --endLabel ''\
--regionsLabel 'peak+int. enrich.' 'peak+ NOT int. enrich.' 'NO peak + int. enrich.' 'NO peak + NOT int. enrich.'\
--samplesLabel 'ELT-2 signal (reps. combined subtracted)'\
gr.promoters.classA = gr.promoters[classA]
# scatter plot with linear mods on logFC up and down separately
gr.promoters.classA %>% as.data.frame() %>%
ggplot(
aes(x=log_chip_signal_max,
y=log2FoldChange,
group=log2FoldChange>0)) + geom_point() +
geom_smooth(method='lm', formula= y~x) +
ggtitle("Peak + Intestine Enriched")
classA.up = promoters.hilo %>% as.data.frame() %>% dplyr::filter(classA & log2FoldChange > 0)
up.table = classA.up[,c('log2FoldChange',
'chip_signal_mean',
'chip_signal_max',
'log_chip_signal_mean',
'log_chip_signal_max',
'IDR_mean', 'IDR_max', 'IDR_value')]
cor.up.table = cor(up.table)
options(digits=3)
knitr::kable(cor.up.table, caption="Pairwise correlations")
| log2FoldChange | chip_signal_mean | chip_signal_max | log_chip_signal_mean | log_chip_signal_max | IDR_mean | IDR_max | IDR_value | |
|---|---|---|---|---|---|---|---|---|
| log2FoldChange | 1.000 | 0.128 | 0.137 | 0.141 | 0.154 | 0.151 | 0.144 | 0.161 |
| chip_signal_mean | 0.128 | 1.000 | 0.956 | 0.954 | 0.894 | 0.919 | 0.922 | 0.794 |
| chip_signal_max | 0.137 | 0.956 | 1.000 | 0.918 | 0.929 | 0.960 | 0.965 | 0.877 |
| log_chip_signal_mean | 0.141 | 0.954 | 0.918 | 1.000 | 0.959 | 0.902 | 0.887 | 0.756 |
| log_chip_signal_max | 0.154 | 0.894 | 0.929 | 0.959 | 1.000 | 0.920 | 0.902 | 0.798 |
| IDR_mean | 0.151 | 0.919 | 0.960 | 0.902 | 0.920 | 1.000 | 0.994 | 0.924 |
| IDR_max | 0.144 | 0.922 | 0.965 | 0.887 | 0.902 | 0.994 | 1.000 | 0.919 |
| IDR_value | 0.161 | 0.794 | 0.877 | 0.756 | 0.798 | 0.924 | 0.919 | 1.000 |
cor.test(classA.up[,'log2FoldChange'],classA.up[,'IDR_mean'])
##
## Pearson's product-moment correlation
##
## data: classA.up[, "log2FoldChange"] and classA.up[, "IDR_mean"]
## t = 6, df = 1765, p-value = 2e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.105 0.196
## sample estimates:
## cor
## 0.151
cor.test(classA.up[,'log2FoldChange'],classA.up[,'log_chip_signal_mean'])
##
## Pearson's product-moment correlation
##
## data: classA.up[, "log2FoldChange"] and classA.up[, "log_chip_signal_mean"]
## t = 6, df = 1765, p-value = 3e-09
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.0949 0.1863
## sample estimates:
## cor
## 0.141
sessionInfo()
## R version 4.1.2 (2021-11-01)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Monterey 12.2.1
##
## Matrix products: default
## LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] MASS_7.3-55 dplyr_1.0.8 kableExtra_1.3.4
## [4] tidyr_1.2.0 ggplot2_3.3.5 GenomicRanges_1.46.1
## [7] GenomeInfoDb_1.30.1 IRanges_2.28.0 S4Vectors_0.32.3
## [10] BiocGenerics_0.40.0 biomaRt_2.50.3 ParasiteXML_0.0.0.9000
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-155 bitops_1.0-7 bit64_4.0.5
## [4] filelock_1.0.2 webshot_0.5.2 RColorBrewer_1.1-2
## [7] progress_1.2.2 httr_1.4.2 tools_4.1.2
## [10] bslib_0.3.1 utf8_1.2.2 R6_2.5.1
## [13] DBI_1.1.2 mgcv_1.8-38 colorspace_2.0-3
## [16] withr_2.4.3 tidyselect_1.1.2 prettyunits_1.1.1
## [19] bit_4.0.4 curl_4.3.2 compiler_4.1.2
## [22] cli_3.2.0 rvest_1.0.2 Biobase_2.54.0
## [25] xml2_1.3.3 labeling_0.4.2 sass_0.4.0
## [28] scales_1.1.1 rappdirs_0.3.3 systemfonts_1.0.4
## [31] stringr_1.4.0 digest_0.6.29 rmarkdown_2.11
## [34] svglite_2.1.0 XVector_0.34.0 pkgconfig_2.0.3
## [37] htmltools_0.5.2 dbplyr_2.1.1 fastmap_1.1.0
## [40] highr_0.9 rlang_1.0.1 rstudioapi_0.13
## [43] RSQLite_2.2.10 jquerylib_0.1.4 farver_2.1.0
## [46] generics_0.1.2 jsonlite_1.7.3 RCurl_1.98-1.6
## [49] magrittr_2.0.2 GenomeInfoDbData_1.2.7 Matrix_1.4-0
## [52] Rcpp_1.0.8 munsell_0.5.0 fansi_1.0.2
## [55] lifecycle_1.0.1 stringi_1.7.6 yaml_2.3.5
## [58] zlibbioc_1.40.0 BiocFileCache_2.2.1 grid_4.1.2
## [61] blob_1.2.2 crayon_1.5.0 lattice_0.20-45
## [64] Biostrings_2.62.0 splines_4.1.2 hms_1.1.1
## [67] KEGGREST_1.34.0 knitr_1.37 pillar_1.7.0
## [70] XML_3.99-0.8 glue_1.6.1 evaluate_0.15
## [73] png_0.1-7 vctrs_0.3.8 gtable_0.3.0
## [76] purrr_0.3.4 assertthat_0.2.1 cachem_1.0.6
## [79] xfun_0.29 viridisLite_0.4.0 tibble_3.1.6
## [82] AnnotationDbi_1.56.2 memoise_2.0.1 ellipsis_0.3.2